# =============================================================================
# Custom Census Import Functions.
# =============================================================================
from __future__ import absolute_import, division, print_function, unicode_literals

from functools import reduce
import pandas as pd
import numpy as np
from collections import OrderedDict
import requests


def lint(a,b,method='fast'):
    try:
        if method=='fast':
            return list(set(a).intersection(set(b)))
        if method=='slow':
            return [v for v in a if (v in b)]
    except Exception:
        return [v for v in a if (v in b)]

def _download(src, year, params, baseurl = 'https://api.census.gov/data/', endpt = ''):
    """Request data from Census API. Returns data in ordered dictionary. Called by `geographies()` and `download()`.
 
    Args:
        src (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates, 'acs3' for
            ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data.
        year (int): Year of data.
        params (dict): Download parameters.
        baseurl (str, optional): Base URL for download.
        endpt (str, optional): Allows override of whether old or new API endpoint is used. Specify
            'old' for old, 'new' for new, '' to use default. This option generally shouldn't
            need to be specified but can be helpful if download problems are encountered.
    """
    if src[:4] == 'acs1' or src[:4] == 'acs5' or src[:5] == 'acsse':
        if endpt == 'new': presrc = 'acs/'
        elif endpt == 'old': presrc = ''
        elif endpt == '': presrc = 'acs/' if year >= 2010 else ''
    elif src[:3] == 'sf1':
        if endpt == 'new' or endpt == '': presrc = 'dec/'
        if endpt == 'old': presrc = ''
    else: presrc = ''
    url = baseurl + str(year) + '/' + presrc + src + '?' + '&'.join('='.join(param) for param in params.items())
    r = requests.get(url)
    try:
        data = r.json()
    except:
        raise ValueError('Unexpected response (URL: {0.url}): {0.text} '.format(r))
    return data

def geographies(within, src, year, key=None, endpt=''):
    """List geographies within a given geography, e.g., counties within a state.
 
    Args:
        within (censusgeo): Geography within which to list geographies.
        src (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates, 'acs3' for
            ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data.
        year (int): Year of data.
        key (str, optional): Census API key.
        endpt (str, optional): Allows override of whether old or new API endpoint is used. Specify
            'old' for old, 'new' for new, '' to use default. This option generally shouldn't
            need to be specified but can be helpful if download problems are encountered.
 
    Returns:
        dict: Dictionary with names as keys and `censusgeo` objects as values.
 
    Examples::
  
        # Pull data on all state geographies from the ACS 2011-2015 5-year estimates.
        censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)
    """
    georequest = within.request()
    params = {'get': 'NAME'}
    params.update(georequest)
    if key is not None: params.update({'key': key})
    geo = _download(src, year, params, endpt=endpt)
    name = geo['NAME']
    del geo['NAME']
    return {name[i]: censusgeo([(key, geo[key][i]) for key in geo]) for i in range(len(name))}


# acs_fields = ['B01001_001E','B01001_002E','B01001_026E']
# y=2013
# acs_fields=acs_vars[acs_vars.depth<=y]['Variable'].tolist()
# src='acs5'
# year=y
# geo=censusd2.censusgeo([('zip code tabulation area', '*')])
# var=acs_fields
# key=api_key
# tabletype='detail'
# endpt=''
# baseurl = 'https://api.census.gov/data/'

def download(src, year, geo, var, key=None, tabletype='detail', endpt=''):
    """Download data from Census API.
 
    Args:
        src (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates, 'acs3' for
            ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data.
        year (int): Year of data.
        geo (censusgeo): Geographies for which to download data.
        var (list of str): Census variables to download.
        key (str, optional): Census API key.
        tabletype (str, optional): Type of table from which variables are drawn (only applicable to ACS data). Options are 'detail' (detail tables),
            'subject' (subject tables), 'profile' (data profile tables), 'cprofile' (comparison profile tables).
        endpt (str, optional): Allows override of whether old or new API endpoint is used. Specify
            'old' for old, 'new' for new, '' to use default. This option generally shouldn't
            need to be specified but can be helpful if download problems are encountered.
 
    Returns:
        pandas.DataFrame: Data frame with columns corresponding to designated variables, and row index of censusgeo objects representing Census geographies.
 
    Raises:
        ValueError: If unknown tabletype is specified.
 
    Examples::
  
        # Download ACS 2011-2015 5-year estimates for Oakland city, California on population size, median age, and median household income.
        censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E'])
    """
    try:
        assert tabletype == 'detail' or tabletype == 'subject' or tabletype == 'profile' or tabletype == 'cprofile'
    except AssertionError:
        raise ValueError('Unknown table type {0}!'.format(tabletype))
    if tabletype == 'detail':
        tabletype = ''
    else:
        tabletype = '/' + tabletype
    georequest = geo.request()
    chunk_size = 49
    data = []
    for var_chunk in [var[i:(i+chunk_size)] for i in range(0, len(var), chunk_size)]:
        params = {'get': ','.join(['NAME']+var_chunk)}
        params.update(georequest)
        if key is not None: params.update({'key': key})
        temp =  _download(src + tabletype, year, params, endpt=endpt)
        if len(temp)>1:
            data += [pd.DataFrame(temp[1:],columns=temp[0])]
    geodata = reduce(lambda df1,df2: pd.merge(df1,df2,how='outer',on=lint(list(df1),list(df2))), data)
    geovars=[v for v in list(geodata) if not v in var]
    geodata['index'] = geodata.apply(lambda x: censusgeo([(gid, x[gid] if (not x[gid] is None) else '') for gid in geovars if (gid != 'NAME')], x['NAME']), axis=1)
    geodata.set_index('index',inplace=True)
    for v in var:
        geodata[v] = pd.to_numeric(geodata[v].fillna(np.nan))
    return geodata[var]


class censusgeo:
    """Class for representing Census geographies.
 
    Args:
        geo (tuple of 2-tuples of strings): Tuple of 2-tuples of the form (geographic component, identifier), where geographic component is a string (e.g., 'state') and
            identifier is either a numeric code (e.g., '01') or a wildcard ('*'). These identify the geography in question.
        name (str, optional): Name of geography (e.g., 'Alabama').
 
    Examples::
  
        censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California') # Represents the Census geography for Oakland city, California.
        censusgeo([('state', '17'), ('county', '031')]) # Represents the Census geography for Cook County, Illinois.
    """
 
    #: dict: Census summary level codes for different types of geography
    sumleveldict = {
        'us': '010',
        'region': '020',
        'division': '030',
        'state': '040',
        'state> county': '050',
        'state> county> county subdivision': '060',
        'state> county> county subdivision> subminor civil division': '067',
        'state> county> county subdivision> place/remainder (or part)': '070',
        'state> county> county subdivision> place > tract (or part)': '080',
        'state> county> tract> block': '101',
        'state> county> tract': '140',
        'state> county> tract> block group': '150',
        'state> place> county (or part)': '155',
        'state> place': '160',
        'state> consolidated city': '170',
        'state> consolidated city> place (or part)': '172',
        'state> alaska native regional corporation': '230',
        'american indian area/alaska native area/hawaiian home land': '250',
        'american indian area/alaska native area/hawaiian home land> tribal subdivision/remainder': '251',
        'american indian area/alaska native area (reservation or statistical entity only)': '252',
        'american indian area (off-reservation trust land only)/hawaiian home land': '254',
        'american indian area/alaska native area/hawaiian home land> tribal census tract': '256',
        'american indian area/alaska native area/hawaiian home land> tribal census tract> tribal block group': '258',
        'american indian area/alaska native area/hawaiian home land> state': '260',
        'american indian area/alaska native area/hawaiian home land> state> place/remainder': '269',
        'american indian area/alaska native area/hawaiian home land> state> county': '270',
        'state> american indian area/alaska native area/hawaiian home land (or part)': '280',
        'state> american indian area> tribal subdivision/remainder (or part)': '281',
        'state> american indian area/alaska native area (reservation or statistical entity only) (or part)': '283',
        'state> american indian area (off-reservation trust land only)/hawaiian home land (or part)': '286',
        'american indian area/alaska native area/hawaiian home land> tribal subdivision/remainder> state': '290',
        'american indian area/alaska native area/hawaiian home land> tribal census tract (or part) within aia (reservation only)': '291',
        'american indian area/alaska native area/hawaiian home land> tribal census tract (or part) within aia (trust land only)': '292',
        'american indian area/alaska native area/hawaiian home land> tribal census tract> tribal block group (or part) within tribal census tract within aia (reservation only)': '293',
        'american indian area/alaska native area/hawaiian home land> tribal census tract> tribal block group (or part) within tribal census tract within aia (trust land only)': '294',
        'metropolitan statistical area/micropolitan statistical area': '310',
        'metropolitan statistical area/micropolitan statistical area> state': '311',
        'metropolitan statistical area/micropolitan statistical area> state> principal city': '312',
        'metropolitan statistical area/micropolitan statistical area> metropolitan division': '314',
        'metropolitan statistical area> metropolitan division> state': '315',
        'state> metropolitan statistical area/micropolitan statistical area (or part)': '320',
        'state> metropolitan statistical area/micropolitan statistical area> principal city (or part)': '321',
        'state> metropolitan statistical area/micropolitan statistical area> county': '322',
        'state> metropolitan statistical area/micropolitan statistical area> metropolitan division (or part)': '323',
        'state> metropolitan statistical area/micropolitan statistical area> metropolitan division> county': '324',
        'combined statistical area': '330',
        'combined statistical area> state': '331',
        'combined statistical area> micropolitan statistical area': '332',
        'combined statistical area> metropolitan statistical area/micropolitan statistical area> state': '333',
        'combined new england city and town area': '335',
        'combined new england city and town area> state': '336',
        'combined new england city and town area> new england city and town area': '337',
        'combined new england city and town area> new england city and town area> state': '338',
        'state> combined statistical area (or part)': '340',
        'state> combined statistical area> metropolitan statistical area/micropolitan statistical area (or part)': '341',
        'state> combined new england city and town area (or part)': '345',
        'state> combined new england city and town area> new england city and town area (or part)': '346',
        'new england city and town area': '350',
        'new england city and town area> state': '351',
        'new england city and town area> state> principal city': '352',
        'new england city and town area> necta division': '355',
        'new england city and town area> necta division> state': '356',
        'state> new england city and town area (or part)': '360',
        'state> new england city and town area> place': '361',
        'state> new england city and town area> county (or part)': '362',
        'state> new england city and town area> county> county subdivision': '363',
        'state> new england city and town area> necta division (or part)': '364',
        'state> new england city and town area> necta division> county (or part)': '365',
        'state> new england city and town area> necta division> county> county subdivision': '366',
        'urban area': '400',
        'urban area> state': '410',
        'urban area> state> county': '430',
        'state> congressional district': '500',
        'state> congressional district> county': '510',
        'state> congressional district> county> tract': '511',
        'state> congressional district> county> county subdivision': '521',
        'state> congressional district> place': '531',
        'state> congressional district> american indian area/alaska native area/hawaiian home land': '550',
        'state> congressional district> alaska native regional corporation': '560',
        'state> state legislative district (upper chamber)': '610',
        'state> state legislative district (upper chamber)> county': '612',
        'state> state legislative district (lower chamber)': '620',
        'state> state legislative district (lower chamber)> county (or part)': '622',
        'state> public use microdata area': '795',
        'zip code tabulation area': '860',
        'state> zip code tabulation area (or part)': '871',
        'state> school district (elementary)': '950',
        'state> school district (secondary)': '960',
        'state> school district (unified)': '970',
    }
 
    def __init__(self, geo, name=''):
        self.geo = tuple(geo)
        self.name = name
 
    def __eq__(self, other):
        return self.geo == other.geo
 
    def __hash__(self):
        return hash(self.geo)
 
    def __repr__(self):
        if self.name == '':
            return'censusgeo({0})'.format(repr(self.geo))
        else:
            return "censusgeo({0}, {1})".format(repr(self.geo), repr(self.name))
 
    def __str__(self):
        if self.name == '':
            return 'Summary level: ' + self.sumlevel() + ', ' + '> '.join([geo[0]+':'+geo[1] for geo in self.geo])
        else:
            return self.name + ': Summary level: ' + self.sumlevel() + ', ' + '> '.join([geo[0]+':'+geo[1] for geo in self.geo])
 
    def params(self):
        """Geographic parameters of this object.
  
        Returns:
            tuple: Tuple representing the geography hierarchy. Can be used as argument in creating new censusgeo object.
  
        Examples::
   
            g = censusdata.censusgeo([('state', '06'), ('place', '53000')])
            g.params() # returns (('state', '06'), ('place', '53000'))
        """
        return self.geo
 
    def hierarchy(self):
        """Geography hierarchy for the geographic level of this object.
  
        Returns:
            str: String representing the geography hierarchy (e.g., 'state> county')."""
        return '> '.join([geo[0] for geo in self.geo])
 
    def sumlevel(self):
        """Summary level code for the geographic level of this object.
  
        Returns:
            str: String representing the summary level code for this object's geographic level, e.g., '050' for 'state> county'."""
        return self.sumleveldict.get(self.hierarchy(), 'unknown')
 
    def request(self):
        """Generate geographic parameters for Census API request.
  
        Returns:
            dict: Dictionary with appropriate 'for' and, if needed, 'in' parameters for Census API request."""
        nospacegeo = [(geo[0].replace(' ', '+'), geo[1]) for geo in self.geo]
        if len(nospacegeo) > 1:
            result = {'for': ':'.join(nospacegeo[-1]),
            'in': '+'.join([':'.join(geo) for geo in nospacegeo[:-1]])}
        else:
            result = {'for': ':'.join(nospacegeo[0])}
        return result


def censusvar(src, year, var):
    """Download information on a list of variables from Census API.
 
    Args:
        src (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates, 'acs3' for
            ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data.
        year (int): Year of data.
        var (list of str): Names of Census variable.
 
    Returns:
        dict: Dictionary with keys 'concept' (overall concept the variable falls under), 'label' (variable label),
            and 'predicateType' (variable type).
 
    Examples::
  
        censusdata.censusvar('sf1', 2010, ['P0010001']) # Returns information on the variable P0010001 from the 2010 Census SF1.
    """
    assert src == 'acs1' or src == 'acs3' or src == 'acs5' or src == 'acsse' or src == 'sf1'
    ret = dict()
    for v in var:
        if src == 'acsse' or src == 'sf1' or v[0] == 'B':
            tabletype = ''
        elif v[0] == 'S':
            tabletype = 'subject/'
        elif v[:2] == 'DP':
            tabletype = 'profile/'
        elif v[:2] == 'CP':
            tabletype = 'cprofile/'
        elif v[0] == 'C':
            tabletype = ''
        else:
            raise ValueError(u'Unknown table type for variable {0}!'.format(v))
        if (src == 'acs1' or src == 'acs5' or src == 'acsse') and year >= 2010: presrc = 'acs/'
        elif src == 'sf1': presrc = 'dec/'
        else: presrc = ''
        r = requests.get('https://api.census.gov/data/{year}/{presrc}{src}/{tabletype}variables/{v}.json'.format(src=src, year=year, v=v, tabletype=tabletype, presrc=presrc))
        try:
            data = r.json()
        except:
            raise ValueError(u'Unexpected response (URL: {0.url}): {0.text} '.format(r))
        try:
            assert data['name'] == v
        except AssertionError:
            raise AssertionError(u'JSON variable information does not include key "name"', data)
        expectedKeys = ['group', 'label', 'limit', 'name',]
        try:
            assert [k for k in sorted(data.keys()) if k != 'attributes' and k != 'concept' and k != 'predicateType'] == expectedKeys
        except AssertionError:
            print(u'JSON variable information does not include expected keys ({0} and possibly attributes, concept, predicateType) or includes extra keys: '.format(expectedKeys), data)
        try: 
            ret[v] = [data.get('concept', ''), data['label'], data.get('predicateType', '')] # Concept, predicate type not provided for all years; default to empty if not provided
        except KeyError:
            raise KeyError(u'JSON variable information does not include expected keys: ', data)
    return ret
